*10_appendix_zeroearnings.do*
*created: Jan 202 
*updated: 6-28-22 by Meredith Welch 
*authors: Meredith Welch
*This code runs regression with major indicators as independent variables and different ways of 
*measuring status in the the earnings sample as outcomes. Modified from updates_jan2022.do 

/** SECTION 0. SET UP **/ 
global  outdir "/srv/tier1/projects/180_major/Majors"
global root "/srv/tier1/projects/180_major/Majors"
global data $root/1_data_cleaning/data 
global output_desc $root/2_data_analysis/desc_stats
global output_results $root/2_data_analysis/results
global tables $output_results/tables
global plots $output_results/plots 

cap log close
log using $output_results/10_appendix_zeroearnings.log, replace

clear all 
set more off
set emptycells drop 
set matsize 11000
set seed 985321
set sortseed 3512315

*Remove lib from locals because it is the omitted category 
local  majors_4yr "ag_4yr com_4yr it_4yr voc_4yr eng_4yr bio_4yr sci_4yr soc_4yr bus_4yr und_4yr"
local  majors_2yr "ag_2yr com_2yr it_2yr voc_2yr eng_2yr bio_2yr sci_2yr soc_2yr bus_2yr educ_2yr und_2yr"

local controls "z_math z_reading rank_90_math rank_7090_math rank_90_reading rank_7090_reading male white hispanic black asian gift atrisk econ_disad"
local controls_comma "z_math, z_reading, rank_90_math, rank_7090_math, rank_90_reading, rank_7090_reading, male, white, hispanic, black, asian, gift, atrisk, econ_disad"


/** SECTION 1. FOUR YEAR INSTITUTIONS **/ 

*Load data collapsed to one individual
use $data/collapsed_data_4yr.dta, clear

*GENERATE LAST COLLEGE-BY-MAJOR INDICATOR FOR CLUSTERING (SAI - 4/24/2023)
gen majors_cluster = .
global temp = 1
foreach major in `majors_4yr' {
	replace majors_cluster = $temp if `major' == 1
	global temp = $temp + 1
}
global drop temp 

gen double inst_major = last_col_4yr*100+	majors_cluster
sum inst_major, detail

*ABOUT 700 OBS ARE MISSING A MAJOR - JUST DROP THESE
drop if inst_major == .

cap rm "$output_results/tables/zeroearnings_analysis_4yr.txt"
cap rm "$output_results/tables/zeroearnings_analysis_4yr.xml"

preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_4yr, cohort) == 1

foreach var of varlist qtrs_in_twc qtrs_nonzero_earn qtrs_zero_earn_noenroll twc_drop { 
	
*No Controls or FE
reg `var' `majors_4yr', vce(cluster inst_major)
outreg2 using "$output_results/tables/zeroearnings_analysis_4yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/  keep(`majors_4yr')

*Controls, no FE 
reg `var' `majors_4yr'  `controls', vce(cluster inst_major)
outreg2 using "$output_results/tables/zeroearnings_analysis_4yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/  addtext(Controls, Yes) keep(`majors_4yr')


*With HS-Cohort, Post-Sec School-Cohort
reghdfe `var'  `majors_4yr'  `controls', absorb(i.last_col_4yr#i.cohort i.campus#i.cohort) vce(cluster inst_major) 
outreg2 using "$output_results/tables/zeroearnings_analysis_4yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/ addtext(Post-Sec + HS FE + Controls, Yes)  keep(`majors_4yr')

} 

restore 


/** SECTION 2. TWO YEAR INSTITUTIONS **/ 

*Load data collapsed to one individual
use $data/collapsed_data_2yr.dta, clear

*GENERATE LAST COLLEGE-BY-MAJOR INDICATOR FOR CLUSTERING (SAI - 4/24/2023)
gen majors_cluster = .
global temp = 1
foreach major in `majors_2yr' {
	replace majors_cluster = $temp if `major' == 1
	global temp = $temp + 1
}
global drop temp 

gen double inst_major = last_col_2yr*100+	majors_cluster
sum inst_major, detail


*ABOUT 20 OBS ARE MISSING A MAJOR - JUST DROP THESE
drop if inst_major == .

cap rm "$output_results/tables/zeroearnings_analysis_2yr.txt"
cap rm "$output_results/tables/zeroearnings_analysis_2yr.xml"

preserve

*Resrict to common sample across specifications - Note that additional differences in samples with same outcome is due to singleton values dropped by reghdfe
drop if missing(`controls_comma', campus, last_col_2yr, cohort) == 1

foreach var of varlist qtrs_in_twc qtrs_nonzero_earn qtrs_zero_earn_noenroll twc_drop { 
	
*No Controls or FE
reg `var' `majors_2yr', vce(cluster inst_major)
outreg2 using "$output_results/tables/zeroearnings_analysis_2yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/  keep(`majors_2yr')

*Controls, no FE 
reg `var' `majors_2yr'  `controls', vce(cluster inst_major)
outreg2 using "$output_results/tables/zeroearnings_analysis_2yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/  addtext(Controls, Yes) keep(`majors_2yr')


*With HS-Cohort, Post-Sec School-Cohort
reghdfe `var'  `majors_2yr'  `controls', absorb(i.last_col_2yr#i.cohort i.campus#i.cohort) vce(cluster inst_major) 
outreg2 using "$output_results/tables/zeroearnings_analysis_2yr", excel se noaster rdec(2) bdec(2) sdec(3) r2 append label ///
title("Zero Earnings Spells") /*addstat(Mean Dept var, `wage_mean')*/ addtext(Post-Sec + HS FE + Controls, Yes)  keep(`majors_2yr')

} 

restore 

log close

/** END **/ 
